# %% 读取所有POI扩展训练集并存为txt文件
import pandas as pd
import os
import csv
from tqdm import tqdm


yf_dianping_df = pd.read_csv("./extend_data/yf_dianping/restaurants.csv")
# %%
folder = "./extend_poi_dataset"
filename = "yf_dianping.txt"
if not os.path.exists(folder):
    os.makedirs(folder)
with open(os.path.join(folder, filename), "w", encoding="utf-8") as f:
    for poi in tqdm(yf_dianping_df["name"].dropna()):
        f.write(poi+"\n")

# %%
import json

with open("./extend_data/地图兴趣点POI数据.json", "r") as f:
    data = json.load(f, strict=False)

# %%
filename = "poi.json.txt"
with open(os.path.join(folder, filename), "w", encoding="utf-8") as f:
    for d in tqdm(data):
        f.write(d["name"]+"\n")
# %%
poi_folder = "./extend_data/2018-POICSV"

for filename in os.listdir(poi_folder):
    print(filename)
    if os.path.exists(os.path.join(folder, filename)):
        continue
    df = pd.read_csv(os.path.join(poi_folder, filename), encoding="gbk", quoting=csv.QUOTE_NONE)
    with open(os.path.join(folder, filename.replace(".csv", ".txt")), "w", encoding="utf-8") as f:
        for name in tqdm(df["name"].dropna(), desc=filename):
            if name != "":
                f.write(name + "\n")


    
# %%
